{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "import os\n",
    "from glob import iglob\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import pickle\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from keras.models import model_from_yaml\n",
    "from keras.models import Model, Sequential\n",
    "from keras.layers import Input, Dense, Activation\n",
    "from keras.callbacks import ModelCheckpoint, TensorBoard\n",
    "from keras.optimizers import Adam\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "devices = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_file(f, devs, c):\n",
    "    d = pd.read_csv(f)\n",
    "    #print(f)\n",
    "    dev = f[8: f.find('/', 8)]\n",
    "    if dev not in devs:\n",
    "        devs[dev] = {}\n",
    "    if c not in devs[dev]:\n",
    "        devs[dev][c] = 0\n",
    "    devs[dev][c] += d.shape[0]\n",
    "    #print(d.shape)\n",
    "    return d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded, shape: \n",
      "(1032056, 115)\n",
      "Loading mirai data\n",
      "Loaded, shape: \n",
      "(3668402, 115)\n",
      "Loading benign data\n",
      "Loaded, shape: \n",
      "(555932, 115)\n"
     ]
    }
   ],
   "source": [
    "df_gafgyt = pd.concat((read_file(f, devices, 'gafgyt') for f in iglob('../data/**/gafgyt_attacks/*.csv', recursive=True)), ignore_index=True)\n",
    "print('Loaded, shape: ')\n",
    "print(df_gafgyt.shape)\n",
    "df_gafgyt['class'] = 'attack'\n",
    "print('Loading mirai data')\n",
    "df_mirai = pd.concat((read_file(f, devices, 'mirai') for f in iglob('../data/**/mirai_attacks/*.csv', recursive=True)), ignore_index=True)\n",
    "print('Loaded, shape: ')\n",
    "print(df_mirai.shape)\n",
    "df_mirai['class'] = 'attack'\n",
    "print('Loading benign data')\n",
    "df_benign = pd.concat((read_file(f, devices, 'benign') for f in iglob('../data/**/benign_traffic.csv', recursive=True)), ignore_index=True)\n",
    "print('Loaded, shape: ')\n",
    "print(df_benign.shape)\n",
    "df_benign['class'] = 'benign'\n",
    "df = df_benign.append(df_gafgyt.sample(frac=1, random_state=17)).append(df_mirai.sample(frac=1, random_state=17))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Ecobee_Thermostat': {'benign': 110818, 'gafgyt': 512133, 'mirai': 13113},\n",
       " 'SimpleHome_XCS7_1002_WHT_Security_Camera': {'benign': 110687,\n",
       "  'gafgyt': 513248,\n",
       "  'mirai': 46585},\n",
       " 'SimpleHome_XCS7_1003_WHT_Security_Camera': {'benign': 115383,\n",
       "  'gafgyt': 514860,\n",
       "  'mirai': 19528},\n",
       " 'Samsung_SNH_1011_N_Webcam': {'benign': 114672, 'mirai': 52150},\n",
       " 'Danmini_Doorbell': {'benign': 118635, 'gafgyt': 652100, 'mirai': 49548},\n",
       " 'Philips_B120N10_Baby_Monitor': {'benign': 114360,\n",
       "  'gafgyt': 610714,\n",
       "  'mirai': 175240},\n",
       " 'Ennio_Doorbell': {'benign': 110931, 'mirai': 39100},\n",
       " 'Provision_PT_737E_Security_Camera': {'benign': 121575,\n",
       "  'gafgyt': 436010,\n",
       "  'mirai': 62154},\n",
       " 'Provision_PT_838_Security_Camera': {'benign': 114995,\n",
       "  'gafgyt': 429337,\n",
       "  'mirai': 98514}}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "devices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "classes = ['benign', 'attack']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MI_dir_L5_weight scored 0.671857128701193\n",
      "MI_dir_L5_mean scored 0.31344467490142436\n",
      "MI_dir_L5_variance scored 0.2904330511595098\n",
      "MI_dir_L3_weight scored 0.7131694928565342\n",
      "MI_dir_L3_mean scored 0.3485744559122524\n",
      "MI_dir_L3_variance scored 0.3554197256901016\n",
      "MI_dir_L1_weight scored 0.7348305096553736\n",
      "MI_dir_L1_mean scored 0.38542073820594\n",
      "MI_dir_L1_variance scored 0.4097137838932303\n",
      "MI_dir_L0.1_weight scored 0.7045419811521229\n",
      "MI_dir_L0.1_mean scored 0.39986870790696877\n",
      "MI_dir_L0.1_variance scored 0.3866892541675427\n",
      "MI_dir_L0.01_weight scored 0.5940252503261184\n",
      "MI_dir_L0.01_mean scored 0.40596709703312395\n",
      "MI_dir_L0.01_variance scored 0.3785429493515016\n",
      "H_L5_weight scored 0.6718570543847242\n",
      "H_L5_mean scored 0.3134446212662264\n",
      "H_L5_variance scored 0.2904330232719689\n",
      "H_L3_weight scored 0.71316942192271\n",
      "H_L3_mean scored 0.3485743192186741\n",
      "H_L3_variance scored 0.35541963930473003\n",
      "H_L1_weight scored 0.7348304560920709\n",
      "H_L1_mean scored 0.3854202117977467\n",
      "H_L1_variance scored 0.40971349988475725\n",
      "H_L0.1_weight scored 0.7045419222081496\n",
      "H_L0.1_mean scored 0.39987039209236924\n",
      "H_L0.1_variance scored 0.3866869900118808\n",
      "H_L0.01_weight scored 0.5940252001395452\n",
      "H_L0.01_mean scored 0.40597672666070134\n",
      "H_L0.01_variance scored 0.3785394871563603\n",
      "HH_L5_weight scored 0.4132503122673671\n",
      "HH_L5_mean scored 0.16797493861158508\n",
      "HH_L5_std scored 0.09093811300190231\n",
      "HH_L5_magnitude scored 0.11831891006139048\n",
      "HH_L5_radius scored 0.06428722159477847\n",
      "HH_L5_covariance scored 0.01271884132848344\n",
      "HH_L5_pcc scored 0.0007154447321042226\n",
      "HH_L3_weight scored 0.4368453985947054\n",
      "HH_L3_mean scored 0.16806308856263502\n",
      "HH_L3_std scored 0.09551359269014058\n",
      "HH_L3_magnitude scored 0.11828474001480377\n",
      "HH_L3_radius scored 0.06660596582456671\n",
      "HH_L3_covariance scored 0.015804104709870325\n",
      "HH_L3_pcc scored 0.0010020000628323939\n",
      "HH_L1_weight scored 0.4599394728842426\n",
      "HH_L1_mean scored 0.16816196078704151\n",
      "HH_L1_std scored 0.11115600896339461\n",
      "HH_L1_magnitude scored 0.11850900247579346\n",
      "HH_L1_radius scored 0.0713328380179043\n",
      "HH_L1_covariance scored 0.020401559308141613\n",
      "HH_L1_pcc scored 0.0008721414593013415\n",
      "HH_L0.1_weight scored 0.36786803411865693\n",
      "HH_L0.1_mean scored 0.16783598418398119\n",
      "HH_L0.1_std scored 0.18478359617288487\n",
      "HH_L0.1_magnitude scored 0.12021486184913462\n",
      "HH_L0.1_radius scored 0.08412297423555815\n",
      "HH_L0.1_covariance scored 0.019695826273443455\n",
      "HH_L0.1_pcc scored 0.015159129379806941\n",
      "HH_L0.01_weight scored 0.10634341713617965\n",
      "HH_L0.01_mean scored 0.16757188659143038\n",
      "HH_L0.01_std scored 0.288737738670044\n",
      "HH_L0.01_magnitude scored 0.12158381321834946\n",
      "HH_L0.01_radius scored 0.10866771363450678\n",
      "HH_L0.01_covariance scored 0.00014925616416385014\n",
      "HH_L0.01_pcc scored 0.07981656562294928\n",
      "HH_jit_L5_weight scored 0.4132503122673671\n",
      "HH_jit_L5_mean scored 0.15857719700445158\n",
      "HH_jit_L5_variance scored 0.0008922024346647104\n",
      "HH_jit_L3_weight scored 0.43684539859470534\n",
      "HH_jit_L3_mean scored 0.1584775989665805\n",
      "HH_jit_L3_variance scored 0.0014331600736055023\n",
      "HH_jit_L1_weight scored 0.4599394728842447\n",
      "HH_jit_L1_mean scored 0.15461542268642559\n",
      "HH_jit_L1_variance scored 0.05968760818645132\n",
      "HH_jit_L0.1_weight scored 0.3678680341186644\n",
      "HH_jit_L0.1_mean scored 0.13434667703150519\n",
      "HH_jit_L0.1_variance scored 0.13987034584354005\n",
      "HH_jit_L0.01_weight scored 0.10634341713617866\n",
      "HH_jit_L0.01_mean scored 0.12950576731324515\n",
      "HH_jit_L0.01_variance scored 0.14383864495688928\n",
      "HpHp_L5_weight scored 0.020830623031173084\n",
      "HpHp_L5_mean scored 0.16666289961220787\n",
      "HpHp_L5_std scored 0.07193549675449958\n",
      "HpHp_L5_magnitude scored 0.13280508185007422\n",
      "HpHp_L5_radius scored 0.03894412109761251\n",
      "HpHp_L5_covariance scored 0.029119087222797903\n",
      "HpHp_L5_pcc scored 0.024127914837345728\n",
      "HpHp_L3_weight scored 0.02210617111512682\n",
      "HpHp_L3_mean scored 0.16668417538949823\n",
      "HpHp_L3_std scored 0.07385675115228726\n",
      "HpHp_L3_magnitude scored 0.13288212820942197\n",
      "HpHp_L3_radius scored 0.03931251077746232\n",
      "HpHp_L3_covariance scored 0.02839977720295587\n",
      "HpHp_L3_pcc scored 0.022815799491080414\n",
      "HpHp_L1_weight scored 0.024008054462908348\n",
      "HpHp_L1_mean scored 0.16682564869456024\n",
      "HpHp_L1_std scored 0.07839263301122627\n",
      "HpHp_L1_magnitude scored 0.13308309583906675\n",
      "HpHp_L1_radius scored 0.03963106158770883\n",
      "HpHp_L1_covariance scored 0.02678591342854369\n",
      "HpHp_L1_pcc scored 0.016861725605091773\n",
      "HpHp_L0.1_weight scored 0.026029847043809846\n",
      "HpHp_L0.1_mean scored 0.167351553791552\n",
      "HpHp_L0.1_std scored 0.1108426882886053\n",
      "HpHp_L0.1_magnitude scored 0.13445154148101793\n",
      "HpHp_L0.1_radius scored 0.044257204074488944\n",
      "HpHp_L0.1_covariance scored 0.022328731057087617\n",
      "HpHp_L0.1_pcc scored 0.013089283443502811\n",
      "HpHp_L0.01_weight scored 0.023213950295695413\n",
      "HpHp_L0.01_mean scored 0.16842377709801556\n",
      "HpHp_L0.01_std scored 0.13977065371254221\n",
      "HpHp_L0.01_magnitude scored 0.13545766595457048\n",
      "HpHp_L0.01_radius scored 0.05451651936924523\n",
      "HpHp_L0.01_covariance scored 0.01605625967921155\n",
      "HpHp_L0.01_pcc scored 0.025260071832979757\n"
     ]
    }
   ],
   "source": [
    "scored = {}\n",
    "indices = {}\n",
    "shps = {}\n",
    "for cl in classes:\n",
    "    indices[cl] = df['class'] == cl\n",
    "    shps[cl] =  df[indices[cl]].shape[0]\n",
    "        \n",
    "for col in df.columns:\n",
    "    if col == 'class':\n",
    "        continue\n",
    "    num = 0\n",
    "    den = 0\n",
    "    m = df[col].mean()\n",
    "    \n",
    "    for cl in classes:\n",
    "        num += (shps[cl] / df.shape[0]) * (m - df[indices[cl]][col].mean())**2\n",
    "        den += (shps[cl] / df.shape[0]) * df[indices[cl]][col].var()\n",
    "    scored[col] = num / den\n",
    "    print(col + ' scored ' + str(scored[col]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "scored_list = [{'feature': f, 'score': s} for f, s in scored.items()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "scored_list.sort(key=lambda x: x['score'], reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'feature': 'MI_dir_L1_weight', 'score': 0.7348305096553736},\n",
       " {'feature': 'H_L1_weight', 'score': 0.7348304560920709},\n",
       " {'feature': 'MI_dir_L3_weight', 'score': 0.7131694928565342},\n",
       " {'feature': 'H_L3_weight', 'score': 0.71316942192271},\n",
       " {'feature': 'MI_dir_L0.1_weight', 'score': 0.7045419811521229},\n",
       " {'feature': 'H_L0.1_weight', 'score': 0.7045419222081496},\n",
       " {'feature': 'MI_dir_L5_weight', 'score': 0.671857128701193},\n",
       " {'feature': 'H_L5_weight', 'score': 0.6718570543847242},\n",
       " {'feature': 'MI_dir_L0.01_weight', 'score': 0.5940252503261184},\n",
       " {'feature': 'H_L0.01_weight', 'score': 0.5940252001395452},\n",
       " {'feature': 'HH_jit_L1_weight', 'score': 0.4599394728842447},\n",
       " {'feature': 'HH_L1_weight', 'score': 0.4599394728842426},\n",
       " {'feature': 'HH_L3_weight', 'score': 0.4368453985947054},\n",
       " {'feature': 'HH_jit_L3_weight', 'score': 0.43684539859470534},\n",
       " {'feature': 'HH_L5_weight', 'score': 0.4132503122673671}]"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scored_list[:15]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
